%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/


%% Created for Lei Zhao at 2011-07-04 18:34:53 -0500 


%% Saved with string encoding Unicode (UTF-8) 



@techreport{crockford2006application,
	Author = {Crockford, D.},
	Date-Added = {2011-04-21 00:35:25 -0500},
	Date-Modified = {2011-04-21 02:07:10 -0500},
	Institution = {RFC 4627, July},
	Title = {{The application/json media type for javascript object notation (json)}},
	Year = {2006}}

@manual{Ihaka:2009aa,
	Author = {Ihaka, R. and Murrell, P. and Hornik, K. and Zeileis, A.},
	Date-Added = {2011-04-20 22:33:16 -0500},
	Date-Modified = {2011-04-21 02:09:39 -0500},
	Note = {R package version 1.0-1},
	Title = {{colorspace}: color space manipulation},
	Url = {http://CRAN.R-project.org/package=colorspace},
	Year = {2009},
	Bdsk-Url-1 = {http://CRAN.R-project.org/package=colorspace}}

@article{Zeileis:2009aa,
	Author = {Zeileis, A. and Hornik, K. and Murrell, P.},
	Date-Added = {2011-04-20 21:23:08 -0500},
	Date-Modified = {2011-04-21 02:13:50 -0500},
	Doi = {10.1016/j.csda.2008.11.033},
	Journal = {Computational Statistics \& Data Analysis},
	Pages = {3259--3270},
	Title = {Escaping {RGB}land: selecting colors for statistical graphics},
	Volume = {53},
	Year = {2009},
	Bdsk-Url-1 = {http://dx.doi.org/10.1016/j.csda.2008.11.033}}

@manual{R2009aa,
	Address = {Vienna, Austria},
	Author = {{R Development Core Team}},
	Date-Added = {2011-04-20 16:16:22 -0500},
	Date-Modified = {2011-04-21 02:13:17 -0500},
	Note = {{ISBN} 3-900051-07-0},
	Organization = {R Foundation for Statistical Computing},
	Title = {R: a language and environment for statistical computing},
	Url = {http://www.R-project.org},
	Year = {2009},
	Bdsk-Url-1 = {http://www.R-project.org}}

@article{GOWER01121966,
	Author = {Gower, J. C.},
	Date-Modified = {2011-04-21 02:09:09 -0500},
	Doi = {10.1093/biomet/53.3-4.325},
	Eprint = {http://biomet.oxfordjournals.org/content/53/3-4/325.full.pdf+html},
	Journal = {Biometrika},
	Number = {3-4},
	Pages = {325-338},
	Title = {Some distance properties of latent root and vector methods used in multivariate analysis},
	Url = {http://biomet.oxfordjournals.org/content/53/3-4/325.abstract},
	Volume = {53},
	Year = {1966},
	Bdsk-Url-1 = {http://biomet.oxfordjournals.org/content/53/3-4/325.abstract},
	Bdsk-Url-2 = {http://dx.doi.org/10.1093/biomet/53.3-4.325}}

@article{LIN2002361,
	Author = {Lin, K. and May, A. C. W. and Taylor, W. R.},
	Date-Modified = {2011-04-21 02:10:30 -0500},
	Doi = {DOI: 10.1006/jtbi.2001.2512},
	Issn = {0022-5193},
	Journal = {Journal of Theoretical Biology},
	Number = {3},
	Pages = {361 - 365},
	Title = {Amino acid encoding schemes from protein structure alignments: multi-dimensional vectors to describe residue types},
	Url = {http://www.sciencedirect.com/science/article/B6WMD-46HNPTY-M/2/eb62f5566f7f581ffeee829118e02782},
	Volume = {216},
	Year = {2002},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6WMD-46HNPTY-M/2/eb62f5566f7f581ffeee829118e02782},
	Bdsk-Url-2 = {http://dx.doi.org/10.1006/jtbi.2001.2512}}

@article{Procter2010aa,
	Abstract = {Software for visualizing sequence alignments and trees are essential tools for life scientists. In this review, we describe the major features and capabilities of a selection of stand-alone and web-based applications useful when investigating the function and evolution of a gene family. These range from simple viewers, to systems that provide sophisticated editing and analysis functions. We conclude with a discussion of the challenges that these tools now face due to the flood of next generation sequence data and the increasingly complex network of bioinformatics information sources.},
	Annote = {{{\textless}p{\textgreater}Procter,} James {B{\textless}br/{\textgreater}\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;Thompson,} {Julie{\textless}br/{\textgreater}\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;Letunic,} {Ivica{\textless}br/{\textgreater}\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;Creevey,} {Chris{\textless}br/{\textgreater}\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;Jossinet,} {Fabrice{\textless}br/{\textgreater}\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;Barton,} Geoffrey {J{\textless}br/{\textgreater}\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;Research} Support, {Non-U.S.} {Gov't{\textless}br/{\textgreater}\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;Review{\textless}br/{\textgreater}\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;United} {States{\textless}br/{\textgreater}\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;Nature} {methods{\textless}br/{\textgreater}\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;\&nbsp;Nat} Methods. 2010 Mar;7(3 {Suppl):S16-25.{\textless}/p{\textgreater}}},
	Author = {Procter, {J. B.} and Thompson, J. and Letunic, I. and Creevey, C. and Jossinet, F. and Barton, {G. J.}},
	Date-Added = {2011-04-19 19:32:13 -0500},
	Date-Modified = {2011-04-21 02:27:37 -0500},
	Issn = {1548-7091 {(Linking)}},
	Journal = {Nature Methods},
	Number = {3 Suppl},
	Pages = {S16--25},
	Title = {Visualization of multiple alignments, phylogenies and gene family evolution},
	Url = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&dopt=Citation&list_uids=20195253},
	Volume = {7},
	Year = {2010},
	Bdsk-Url-1 = {http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Retrieve&db=PubMed&dopt=Citation&list_uids=20195253}}

@article{Edgar:2006aa,
	Abstract = {{{\textless}p{\textgreater}{\textless}br/{\textgreater}Multiple} sequence alignments are an essential tool for protein structure and function prediction, phylogeny inference and other common tasks in sequence analysis. Recently developed systems have advanced the state of the art with respect to accuracy, ability to scale to thousands of proteins and flexibility in comparing proteins that do not share the same domain architecture. New multiple alignment benchmark databases include {PREFAB,} {SABMARK,} {OXBENCH} and {IRMBASE.} Although {CLUSTALW} is still the most popular alignment tool to date, recent methods offer significantly better alignment quality and, in some cases, reduced computational cost.{\textless}/p{\textgreater}},
	Author = {Edgar, R. C. and Batzoglou, S.},
	Date-Added = {2011-04-19 19:47:20 -0500},
	Date-Modified = {2011-04-21 02:15:29 -0500},
	Doi = {10.1016/j.sbi.2006.04.004},
	Issn = {{0959-440X}},
	Journal = {Current Opinion in Structural Biology},
	Number = {3},
	Pages = {368--373},
	Title = {Multiple sequence alignment},
	Url = {http://www.sciencedirect.com/science/article/B6VS6-4JWFGTG-2/2/02ab69ca5aac71752b4e2abebd686333},
	Volume = {16},
	Year = {2006},
	Bdsk-Url-1 = {http://www.sciencedirect.com/science/article/B6VS6-4JWFGTG-2/2/02ab69ca5aac71752b4e2abebd686333},
	Bdsk-Url-2 = {http://dx.doi.org/10.1016/j.sbi.2006.04.004}}

@article{Penn:2010aa,
	Abstract = {Multiple sequence alignment {(MSA)} is the basis for a wide range of comparative sequence analyses from molecular phylogenetics to {3D} structure prediction. Sophisticated algorithms have been developed for sequence alignment, but in practice, many errors can be expected and extensive portions of the {MSA} are unreliable. Hence, it is imperative to understand and characterize the various sources of errors in {MSAs} and to quantify site-specific alignment confidence. In this paper, we show that uncertainties in the guide tree used by progressive alignment methods are a major source of alignment uncertainty. We use this insight to develop a novel method for quantifying the robustness of each alignment column to guide tree uncertainty. We build on the widely used bootstrap method for perturbing the phylogenetic tree. Specifically, we generate a collection of trees and use each as a guide tree in the alignment algorithm, thus producing a set of {MSAs.} We next test the consistency of every column of the {MSA} obtained from the unperturbed guide tree with respect to the set of {MSAs.} We name this measure the {``GUIDe} tree based {AligNment} {ConfidencE''} {(GUIDANCE)} score. Using the Benchmark Alignment data {BASE} benchmark as well as simulation studies, we show that {GUIDANCE} scores accurately identify errors in {MSAs.} Additionally, we compare our results with the previously published {Heads-or-Tails} score and show that the {GUIDANCE} score is a better predictor of unreliably aligned regions.},
	Author = {Penn, O. and Privman, E. and Landan, G. and Graur, D. and Pupko, T.},
	Date-Added = {2011-04-19 21:58:04 -0500},
	Date-Modified = {2011-04-21 02:12:30 -0500},
	Doi = {10.1093/molbev/msq066},
	Journal = {Molecular Biology and Evolution},
	Number = {8},
	Pages = {1759 --1767},
	Title = {An alignment confidence score capturing robustness to guide tree uncertainty},
	Url = {http://mbe.oxfordjournals.org/content/27/8/1759.abstract},
	Volume = {27},
	Year = {2010},
	Bdsk-Url-1 = {http://mbe.oxfordjournals.org/content/27/8/1759.abstract},
	Bdsk-Url-2 = {http://dx.doi.org/10.1093/molbev/msq066}}

@article{Penn:2010ab,
	Author = {Penn, O. and Privman, E. and Ashkenazy, H. and Landan, G. and Graur, D. and Pupko, T.},
	Date-Added = {2011-04-19 21:59:19 -0500},
	Date-Modified = {2011-04-21 02:16:13 -0500},
	Doi = {10.1093/nar/gkq443},
	Issn = {0305-1048},
	Journal = {Nucleic Acids Research},
	Number = {Web Server},
	Pages = {W23--W28},
	Shorttitle = {{GUIDANCE}},
	Title = {{GUIDANCE:} a web server for assessing alignment confidence scores},
	Url = {http://nar.oxfordjournals.org/content/38/suppl_2/W23.short},
	Volume = {38},
	Year = {2010},
	Bdsk-Url-1 = {http://nar.oxfordjournals.org/content/38/suppl_2/W23.short},
	Bdsk-Url-2 = {http://dx.doi.org/10.1093/nar/gkq443}}

@book{Borg:1997aa,
	Author = {Borg, Ingwer and Groenen, Patrick J. F.},
	Date-Added = {2011-04-19 23:03:00 -0500},
	Date-Modified = {2011-04-19 23:03:00 -0500},
	Isbn = {9780387948454},
	Publisher = {Springer},
	Shorttitle = {Modern multidimensional scaling},
	Title = {Modern multidimensional scaling: theory and applications},
	Year = {1997}}

@article{McLAREN:1976aa,
	Author = {{McLaren}, K.},
	Date-Added = {2011-04-20 14:22:30 -0500},
	Date-Modified = {2011-04-21 02:16:05 -0500},
	Doi = {10.1111/j.1478-4408.1976.tb03301.x},
	Issn = {1478-4408},
	Journal = {Journal of the Society of Dyers and Colourists},
	Number = {9},
	Pages = {338--341},
	Title = {{XIII---The} development of the {CIE} 1976 {(L*} a* b*) uniform colour space and colour‐difference formula},
	Url = {http://onlinelibrary.wiley.com/doi/10.1111/j.1478-4408.1976.tb03301.x/abstract},
	Volume = {92},
	Year = {1976},
	Bdsk-Url-1 = {http://onlinelibrary.wiley.com/doi/10.1111/j.1478-4408.1976.tb03301.x/abstract},
	Bdsk-Url-2 = {http://dx.doi.org/10.1111/j.1478-4408.1976.tb03301.x}}

@article{Gentleman:aa,
	Author = {Gentleman, R. and Ihaka, R.},
	Date-Added = {2011-04-20 16:10:51 -0500},
	Date-Modified = {2011-04-21 02:16:01 -0500},
	Doi = {10.2307/1390807},
	Journal = {Journal Of Computational And Graphical Statistics},
	Number = {3},
	Pages = {299--314},
	Shorttitle = {R},
	Title = {R: a language for data analysis and graphics},
	Volume = {5},
	Year = {1996},
	Bdsk-Url-1 = {http://dx.doi.org/10.2307/1390807}}

@article{Cailliez:1983aa,
	Author = {Cailliez, F.},
	Date-Added = {2011-04-20 16:18:29 -0500},
	Date-Modified = {2011-04-21 02:14:58 -0500},
	Doi = {10.1007/BF02294026},
	Issn = {0033-3123},
	Journal = {Psychometrika},
	Number = {2},
	Pages = {305--308},
	Title = {The analytical solution of the additive constant problem},
	Url = {http://www.springerlink.com/content/l5l10305872578k7/},
	Volume = {48},
	Year = {1983},
	Bdsk-Url-1 = {http://www.springerlink.com/content/l5l10305872578k7/},
	Bdsk-Url-2 = {http://dx.doi.org/10.1007/BF02294026}}

@incollection{Cox:2008aa,
	Address = {Berlin, Heidelberg},
	Author = {Cox, M. A. A. and Cox, T. F.},
	Booktitle = {Handbook of Data Visualization},
	Date-Added = {2011-04-20 16:19:41 -0500},
	Date-Modified = {2011-04-21 02:07:26 -0500},
	Isbn = {978-3-540-33036-3},
	Pages = {315--347},
	Publisher = {Springer Berlin Heidelberg},
	Title = {Multidimensional scaling},
	Url = {http://www.springerlink.com/content/v6j14w058h7n4407/},
	Year = {2008},
	Bdsk-Url-1 = {http://www.springerlink.com/content/v6j14w058h7n4407/}}

@book{CIE:1932aa,
	Author = {C.I.E.},
	Date-Added = {2011-04-20 17:00:58 -0500},
	Date-Modified = {2011-04-21 02:21:09 -0500},
	Publisher = {Cambridge University Press, Cambridge},
	Title = {Commission internationale de {l'Eclairage} proceedings, 1931},
	Year = {1932}}

@article{Smith:1931aa,
	Author = {Smith, T.},
	Date-Added = {2011-04-20 17:01:12 -0500},
	Date-Modified = {2011-04-21 02:04:14 -0500},
	Doi = {10.1088/1475-4878/33/3/301},
	Journal = {Transactions of the Optical Society},
	Number = {3},
	Pages = {73--134},
	Title = {The {C.I.E.} colorimetric standards and their use},
	Volume = {33},
	Year = {1931},
	Bdsk-Url-1 = {http://dx.doi.org/10.1088/1475-4878/33/3/301}}

@book{Margulis:2005aa,
	Author = {Margulis, D.},
	Date-Added = {2011-04-20 17:03:59 -0500},
	Date-Modified = {2011-04-20 17:03:59 -0500},
	Isbn = {0321356780},
	Publisher = {Peachpit Press Berkeley, {CA,} {USA}},
	Shorttitle = {Photoshop {LAB} color},
	Title = {Photoshop {LAB} color: The canyon conundrum and other adventures in the most powerful colorspace},
	Year = {2005}}

@article{Byrd:1995aa,
	Author = {Byrd, R. H. and Lu, P. and Nocedal, J. and Zhu, C.},
	Date-Added = {2011-04-20 22:41:37 -0500},
	Date-Modified = {2011-04-21 02:06:48 -0500},
	Journal = {{SIAM} Journal on Scientific Computing},
	Pages = {1190},
	Title = {A limited memory algorithm for bound constrained optimization},
	Volume = {16},
	Year = {1995}}

@article{Thompsonaa,
	Author = {Thompson, J. D. and Gibson, T. J. and Higgins, D. G.},
	Date-Added = {2011-04-20 22:44:03 -0500},
	Date-Modified = {2011-04-21 02:19:33 -0500},
	Issn = {{1934-340X}},
	Journal = {Current Protocols in Bioinformatics},
	Pages = {2.3},
	Title = {Multiple sequence alignment using {ClustalW} and {ClustalX.}},
	Year = {2002}}

@article{Nelder:1965aa,
	Abstract = {A method is described for the minimization of a function of n variables, which depends on the comparison of function values at the (n + 1) vertices of a general simplex, followed by the replacement of the vertex with the highest value by another point. The simplex adapts itself to the local landscape, and contracts on to the final minimum. The method is shown to be effective and computationally compact. A procedure is given for the estimation of the Hessian matrix in the neighbourhood of the minimum, needed in statistical estimation problems.},
	Author = {Nelder, J. A. and Mead, R.},
	Date-Added = {2011-04-21 01:22:48 -0500},
	Date-Modified = {2011-04-21 02:16:09 -0500},
	Doi = {10.1093/comjnl/7.4.308},
	Journal = {The Computer Journal},
	Number = {4},
	Pages = {308 --313},
	Title = {A simplex method for function minimization},
	Url = {http://comjnl.oxfordjournals.org/content/7/4/308.abstract},
	Volume = {7},
	Year = {1965},
	Bdsk-Url-1 = {http://comjnl.oxfordjournals.org/content/7/4/308.abstract},
	Bdsk-Url-2 = {http://dx.doi.org/10.1093/comjnl/7.4.308}}

@article{Fletcher:1964aa,
	Abstract = {A quadratically convergent gradient method for locating an unconstrained local minimum of a function of several variables is described. Particular advantages are its simplicity and its modest demands on storage, space for only three vectors being required. An {ALGOL} procedure is presented, and the paper includes a discussion of results obtained by its used on various test functions.},
	Author = {Fletcher, R. and Reeves, C. M.},
	Date-Added = {2011-04-21 01:23:46 -0500},
	Date-Modified = {2011-04-21 02:15:34 -0500},
	Doi = {10.1093/comjnl/7.2.149},
	Journal = {The Computer Journal},
	Number = {2},
	Pages = {149 --154},
	Title = {Function minimization by conjugate gradients},
	Url = {http://comjnl.oxfordjournals.org/content/7/2/149.abstract},
	Volume = {7},
	Year = {1964},
	Bdsk-Url-1 = {http://comjnl.oxfordjournals.org/content/7/2/149.abstract},
	Bdsk-Url-2 = {http://dx.doi.org/10.1093/comjnl/7.2.149}}

@article{Belisle:1992aa,
	Abstract = {{{\textless}p{\textgreater}We} study a class of simulated annealing algorithms for global minimization of a continuous function defined on a subset of Rd. We consider the case where the selection Markov kernel is absolutely continuous and has a density which is uniformly bounded away from 0. This class includes certain simulated annealing algorithms recently introduced by various authors. We show that, under mild conditions, the sequence of states generated by these algorithms converges in probability to the global minimum of the function. Unlike most previous studies where the cooling schedule is deterministic, our cooling schedule is allowed to be adaptive. We also address the issue of almost sure convergence versus convergence in probability.},
	Author = {B{\'e}lisle, C. J. P.},
	Date-Added = {2011-04-21 01:24:26 -0500},
	Date-Modified = {2011-04-21 02:27:28 -0500},
	Doi = {10.2307/3214721},
	Issn = {0021-9002},
	Journal = {Journal of Applied Probability},
	Number = {4},
	Pages = {885--895},
	Title = {Convergence theorems for a class of simulated annealing algorithms on Rd},
	Url = {http://www.jstor.org/stable/3214721},
	Volume = {29},
	Year = {1992},
	Bdsk-Url-1 = {http://www.jstor.org/stable/3214721},
	Bdsk-Url-2 = {http://dx.doi.org/10.2307/3214721}}

@article{Benson:2007lr,
	Author = {Benson, D. A. and {Karsch-Mizrachi}, I. and Lipman, D. J. and Ostell, J. and Wheeler, D. L.},
	Date-Added = {2011-06-26 00:17:37 -0500},
	Date-Modified = {2011-06-26 00:17:37 -0500},
	Doi = {10.1093/nar/gkm929},
	Issn = {0305-1048},
	Journal = {Nucleic Acids Research},
	Month = dec,
	Number = {Database},
	Pages = {D25--D30},
	Title = {{GenBank}},
	Url = {http://nar.oxfordjournals.org/content/36/suppl_1/D25.full},
	Volume = {36},
	Year = {2007},
	Bdsk-Url-1 = {http://nar.oxfordjournals.org/content/36/suppl_1/D25.full},
	Bdsk-Url-2 = {http://dx.doi.org/10.1093/nar/gkm929}}

@article{Waterhouse:2009fk,
	Abstract = {Summary: Jalview Version 2 is a system for interactive {WYSIWYG} editing, analysis and annotation of multiple sequence alignments. Core features include keyboard and mouse-based editing, multiple views and alignment overviews, and linked structure display with Jmol. Jalview 2 is available in two forms: a lightweight Java applet for use in web applications, and a powerful desktop application that employs web services for sequence alignment, secondary structure prediction and the retrieval of alignments, sequences, annotation and structures from public databases and any {DAS} 1.53 compliant sequence or annotation {server.Availability:} The Jalview 2 Desktop application and {JalviewLite} applet are made freely available under the {GPL}, and can be downloaded from {www.jalview.orgContact:} g.j.barton@dundee.ac.uk},
	Author = {Waterhouse, Andrew M. and Procter, James B. and Martin, David M. A. and Clamp, Mich{\`e}le and Barton, Geoffrey J.},
	Date-Added = {2011-06-26 02:24:24 -0500},
	Date-Modified = {2011-06-26 02:24:24 -0500},
	Doi = {10.1093/bioinformatics/btp033},
	Journal = {Bioinformatics},
	Month = may,
	Number = {9},
	Pages = {1189 --1191},
	Title = {Jalview Version 2---a multiple sequence alignment editor and analysis workbench},
	Url = {http://bioinformatics.oxfordjournals.org/content/25/9/1189.abstract},
	Volume = {25},
	Year = {2009},
	Bdsk-Url-1 = {http://bioinformatics.oxfordjournals.org/content/25/9/1189.abstract},
	Bdsk-Url-2 = {http://dx.doi.org/10.1093/bioinformatics/btp033}}

@article{Cornish-Bowden:1985lr,
	Author = {{Cornish-Bowden}, A},
	Date-Added = {2011-07-02 18:07:57 -0500},
	Date-Modified = {2011-07-02 18:07:57 -0500},
	Issn = {0305-1048},
	Journal = {Nucleic Acids Research},
	Month = may,
	Note = {{PMID:} 2582368 {PMCID:} 341218},
	Number = {9},
	Pages = {3021--3030},
	Shorttitle = {Nomenclature for incompletely specified bases in nucleic acid sequences},
	Title = {Nomenclature for incompletely specified bases in nucleic acid sequences: recommendations 1984.},
	Volume = {13},
	Year = {1985}}

@article{Thompson:2005fk,
	Abstract = {Multiple sequence alignment is one of the cornerstones of modern molecular biology. It is used to identify conserved motifs, to determine protein domains, in {2D/3D} structure prediction by homology and in evolutionary studies. Recently, high-throughput technologies such as genome sequencing and structural proteomics have lead to an explosion in the amount of sequence and structure information available. In response, several new multiple alignment methods have been developed that improve both the efficiency and the quality of protein alignments. Consequently, the benchmarks used to evaluate and compare these methods must also evolve. We present here the latest release of the most widely used multiple alignment benchmark, {BAliBASE}, which provides high quality, manually refined, reference alignments based on {3D} structural superpositions. Version 3.0 of {BAliBASE} includes new, more challenging test cases, representing the real problems encountered when aligning large sets of complex sequences. Using a novel, semiautomatic update protocol, the number of protein families in the benchmark has been increased and representative test cases are now available that cover most of the protein fold space. The total number of proteins in {BAliBASE} has also been significantly increased from 1444 to 6255 sequences. In addition, full-length sequences are now provided for all test cases, which represent difficult cases for both global and local alignment programs. Finally, the {BAliBASE} Web site (http://www-bio3d-igbmc.u-strasbg.fr/balibase) has been completely redesigned to provide a more user-friendly, interactive interface for the visualization of the {BAliBASE} reference alignments and the associated annotations. Proteins 2005. {\copyright} 2005 {Wiley-Liss}, Inc.},
	Author = {Thompson, Julie D and Koehl, Patrice and Ripp, Raymond and Poch, Olivier},
	Date-Added = {2011-07-03 23:29:23 -0500},
	Date-Modified = {2011-07-03 23:29:23 -0500},
	Doi = {10.1002/prot.20527},
	Issn = {1097-0134},
	Journal = {Proteins: Structure, Function, and Bioinformatics},
	Keywords = {alignment accuracy, alignment reliability, program comparison, program evaluation, reference alignment, structure superposition},
	Month = oct,
	Number = {1},
	Pages = {127--136},
	Shorttitle = {{BAliBASE} 3.0},
	Title = {{BAliBASE} 3.0: Latest developments of the multiple sequence alignment benchmark},
	Url = {http://onlinelibrary.wiley.com/doi/10.1002/prot.20527/abstract},
	Volume = {61},
	Year = {2005}}

@article{Tzeng:2008qy,
	Abstract = {{BACKGROUND:Multi-dimensional} scaling {(MDS)} is aimed to represent high dimensional data in a low dimensional space with preservation of the similarities between data points. This reduction in dimensionality is crucial for analyzing and revealing the genuine structure hidden in the data. For noisy data, dimension reduction can effectively reduce the effect of noise on the embedded structure. For large data set, dimension reduction can effectively reduce information retrieval complexity. Thus, {MDS} techniques are used in many applications of data mining and gene network research. However, although there have been a number of studies that applied {MDS} techniques to genomics research, the number of analyzed data points was restricted by the high computational complexity of {MDS.} In general, a non-metric {MDS} method is faster than a metric {MDS}, but it does not preserve the true relationships. The computational complexity of most metric {MDS} methods is over {O(N2)}, so that it is difficult to process a data set of a large number of genes N, such as in the case of whole genome microarray {data.RESULTS:We} developed a new rapid metric {MDS} method with a low computational complexity, making metric {MDS} applicable for large data sets. Computer simulation showed that the new method of split-and-combine {MDS} {(SC-MDS)} is fast, accurate and efficient. Our empirical studies using microarray data on the yeast cell cycle showed that the performance of K-means in the reduced dimensional space is similar to or slightly better than that of K-means in the original space, but about three times faster to obtain the clustering results. Our clustering results using {SC-MDS} are more stable than those in the original space. Hence, the proposed {SC-MDS} is useful for analyzing whole genome {data.CONCLUSION:Our} new method reduces the computational complexity from {O(N3)} to {O(N)} when the dimension of the feature space is far less than the number of genes N, and it successfully reconstructs the low dimensional representation as does the classical {MDS.} Its performance depends on the grouping method and the minimal number of the intersection points between groups. Feasible methods for grouping methods are suggested; each group must contain both neighboring and far apart data points. Our method can represent high dimensional large data set in a low dimensional space not only efficiently but also effectively.},
	Author = {Tzeng, Jengnan and Lu, Henry and Li, {Wen-Hsiung}},
	Date-Added = {2011-07-04 18:34:46 -0500},
	Date-Modified = {2011-07-04 18:34:46 -0500},
	Doi = {10.1186/1471-2105-9-179},
	Issn = {1471-2105},
	Journal = {{BMC} Bioinformatics},
	Number = {1},
	Pages = {179},
	Title = {Multidimensional scaling for large genomic data sets},
	Url = {http://www.biomedcentral.com/1471-2105/9/179},
	Volume = {9},
	Year = {2008}}
